set.seed(2019)
library(yaml)
CONFIG <- yaml.load_file('config_global.yaml')

source(sprintf("%s/library.r", CONFIG$source$lib))

main <- function() {
  data <- NULL
  for (country in c("australia", "britain", "canada", "denmark", "germany", "japan", "france", "new_zealand", 
                    "norway", "sweden", "switzerland", "united_states", "germany_alt",
                    "switzerland_alt", "united_states_alt", "cses")){
    db   <- generate_measures(country)
    data <- rbind(data, db)  
  }
  data <- data[!is.na(data$partisan_affect_polarization), ]
  
  write.csv(data %>% filter(country == "cses"), sprintf("%s/cses_only.csv", CONFIG$build$descriptive))
  
  # Replace with CSES and drop
  swiss_alt <- data[grepl("Switzerland_2003", data$years), ]
  swiss_alt$country <- "switzerland_alt"
  swiss_alt$years   <- "2003"
  
  levels(data$country) <- c(levels(data$country), "japan")
  data$country[grepl("France", data$years)]           <- "france"
  data$country[grepl("Japan", data$years)]            <- "japan"
  data$country[grepl("Switzerland_2003", data$years)] <- "switzerland"
  data$years <- gsub("France_|Japan_|Switzerland_", "", data$years)
  data[nrow(data) + 1, ] <- swiss_alt
  data <- data[order(data[, c("years")]), ]
  data <- data[order(data[, c("country")]), ]
  data <- data[data$country != "cses", ]
  data$years <- as.numeric(as.character(data$years))
  
  write.csv(data, sprintf("%s/data.csv", CONFIG$build$descriptive))
  
  # For distribution
  rounded_data <- data[, c("years", "country", "partisan_affect_polarization")]
  rounded_data <- rounded_data[!grepl("_alt", rounded_data$country), ]
  rounded_data$partisan_affect_polarization <- round(rounded_data$partisan_affect_polarization, digits = 2)
  write.csv(rounded_data, sprintf("%s/data_rounded.csv", CONFIG$build$descriptive), row.names = F)
}

generate_measures <- function(country_name){
  
  orig_data  <- read.csv(sprintf("%s/%s.csv", CONFIG$build$prepare_data, country_name))
  years      <- unique(orig_data$year) 
  db         <- data.frame(years)
  db$country <- country_name 
  
  if (country_name != "cses"){
    country_questions <- read.csv(sprintf('%s/question_wordings.csv', CONFIG$source$descriptive))
    db                <- merge(db, country_questions)
  } else {
    db$scaling          <- "(0-10)"
    db$question_wording <- "like"
  }
  
  # Top 2 parties by year
  affil_years           <- aggregate(orig_data$weight, by = list(orig_data$year, !is.na(orig_data$party)), FUN = sum, na.rm = T)
  colnames(affil_years) <- c("year", "has_party", "total")
  party_years           <- aggregate(orig_data$weight, by = list(orig_data$year, orig_data$party),         FUN = sum, na.rm = T)
  colnames(party_years) <- c("year", "party", "total")
  top_2 <- c()
  for (y in unique(party_years$year)){
    aff_tot  <- sum(affil_years[affil_years$year == y, "total"])
    pty_tot  <- sum(party_years[party_years$year == y, "total"])
    
    temp     <- party_years[party_years$year == y, ]
    temp     <- temp[order(temp$total, decreasing = T), ]
    temp     <- temp[1:2, ]
    
    has_party <- affil_years[affil_years$year == y & affil_years$has_party, "total"] / aff_tot
    top_2     <- rbind(top_2, 
                       c(y, temp[order(temp[, "party"]), "party"], sum(temp[, "total"]) / pty_tot, has_party))
  }
  colnames(top_2) <- c("year", "top_party_1", "top_party_2", "share_top_2", "share_has_party")
  write.csv(top_2, sprintf("%s/top_2_%s.csv", CONFIG$build$descriptive, country_name), row.names = F)
  
  # Calc Affective Polarization
  for (i in 1:length(years)){
    year_data <- subset(orig_data, year == years[i])
    
    # Baseline Affect
    db <- add_affect(db, year_data, years, i)
    
    # Top two parties
    top_2_columns <- (colnames(year_data) == sprintf("feel_party%s", top_2[which(top_2[, 1] == years[i]), 2]) | 
                        colnames(year_data) == sprintf("feel_party%s", top_2[which(top_2[, 1] == years[i]), 3]) | 
                        colnames(year_data) %in% c("party", "weight", "year"))
    db            <- add_affect(db, year_data[, top_2_columns], years, i, sfx = "_top_two")
    db[["top_2_parties"]][i] <- ifelse(length(top_2_columns) > 0,
                                       sprintf("%s:%s", top_2[which(top_2[, 1] == years[i]), 2], top_2[which(top_2[, 1] == years[i]), 3]),
                                       "NA")
    
    # Bin by 25
    temp <- year_data
    temp[, grepl("feel_party", colnames(temp))] <- round(temp[, grepl("feel_party", colnames(temp))] / 25) * 25
    db <- add_affect(db, temp, years, i, sfx = "_bin")
    
    # Data driven
    temp       <- year_data
    temp$party <- sapply(1:nrow(temp), function(x) get_fav_party(temp[x, ]))
    db         <- add_affect(db, temp, years, i, sfx = "_data")
    
    # Leaner
    temp <- year_data
    temp <- temp%>% as.data.frame() %>% mutate(party = ifelse(is.na(party), leaner, party))
    db   <- add_affect(db, temp, years, i, sfx = "_leaner")
    if (sum(temp$has_leaner) > 0) {
      db[i, "has_leaner"] <- T
    } else {
      db[i, "has_leaner"] <- F
    }
  }
  
  return(db)
}

add_affect <- function(db, year_data, years, i, sfx = ""){
  
  # Create column for each individual's feelings towards own party
  a                        <- cbind(seq_along(year_data$party), match(sprintf("feel_party%s", year_data$party), colnames(year_data)))
  year_data$own_party_feel <- year_data[a]
  year_data$own_party_feel <- as.numeric(year_data$own_party_feel)
  
  # Iteratively define N and P
  non_na_affect <- as.numeric(gsub("feel_party", "", colnames(year_data)[grepl("feel_party", colnames(year_data))]))
  numb_resp     <- 1e10
  numb_parties  <- 1e10
  
  drop_individuals <- function(year_data, non_na_affect){
    year_data <- year_data[year_data$party %in% non_na_affect & !is.na(year_data$party), ] # Keep respondents with valid party
    return(year_data[!is.na(year_data$own_party_feel) & rowSums(!is.na(year_data[, sprintf("feel_party%s", non_na_affect)])) >= 2, ]) # Keep respondents with at least two affect values
  }
  get_non_na_affect <- function(year_data){
    non_na_affect <- as.numeric(gsub("feel_party", "", names(which(colSums(is.na(year_data[, grepl("feel_party", colnames(year_data))])) != nrow(year_data)))))
    return(non_na_affect[non_na_affect %in% unique(year_data$party)])
  }
  
  while(nrow(year_data) < numb_resp | length(non_na_affect) < numb_parties){
    # Iterate until N and P are stable
    numb_resp     <- nrow(year_data)
    numb_parties  <- length(non_na_affect)
    
    year_data     <- drop_individuals(year_data, non_na_affect)
    non_na_affect <- get_non_na_affect(year_data)
  }
  
  # Affective polarization
  if (length(non_na_affect) >= 2){
    out <- calc_partisan_affect_polarization(year_data, non_na_affect)
    db[[sprintf("partisan_affect_polarization%s", sfx)]][i]     <- out$affect
    db[[sprintf("partisan_affect_polarization_in%s", sfx)]][i]  <- out$affect_in
    db[[sprintf("partisan_affect_polarization_out%s", sfx)]][i] <- out$affect_out
    db[[sprintf("partisan_affect_polarization_sd%s", sfx)]][i]  <- out$affect_sd
    db[[sprintf("affect_sample_sd%s", sfx)]][i]                 <- out$sample_sd
  } else {
    db[[sprintf("partisan_affect_polarization%s", sfx)]][i] <- db[[sprintf("partisan_affect_polarization_in%s", sfx)]][i] <-  
      db[[sprintf("partisan_affect_polarization_out%s", sfx)]][i] <- db[[sprintf("affect_sample_sd%s", sfx)]][i] <- 
      db[[sprintf("partisan_affect_polarization_sd%s", sfx)]][i] <- NA
  }
  db[[sprintf("non_na_affect%s", sfx)]][i] <- length(non_na_affect)
  
  return(db)
}

get_fav_party <- function(row){
  max_affect        <- max(row[, grep("feel_party", colnames(row))], na.rm = T)
  candidate_parties <- which(row[, grep("feel_party", colnames(row))] == max_affect)
  if (length(candidate_parties) > 1){
    return(sample(candidate_parties, 1))
  } else if (length(candidate_parties) == 1){
    return(candidate_parties)
  } else {
    return(NA)
  }
}

main()
